***************************************************************
* Afrobarometer R9 x ACLED – Cleaned, Replicable Pipeline
* (copy–paste into a .do file)
***************************************************************
//--------------------------------------------------------------
// 01_clean.do — Replication cleaning script
// Project: "The Impact of Terrorism on Democratic Support in Africa"
//--------------------------------------------------------------
version 18.0
clear all
set more off
set rmsg on
cap set scheme plotplain

* --------------------------------------------------------------
* Project root (robust to where the do-file is run from)
* --------------------------------------------------------------
local thisdo = c(filename)

* Fallback if c(filename) is empty (rare)
if "`thisdo'" == "" {
    global root "`c(pwd)'"
}
else {
    * Directory containing this do-file
    local dodir : dirname "`thisdo'"
    * Project root is parent of /dofile
    local root  : dirname "`dodir'"
    global root "`root'"
}

* If someone runs from inside /dofile (extra safety)
if substr("${root}", -6, 6) == "dofile" {
    global root = substr("${root}", 1, length("${root}")-6)
}


*--------------------------------------------------------------
* 0. PATHS (relative; no machine-specific paths)
*--------------------------------------------------------------

global data   "${root}/Original data"
global do     "${root}/dofile"
global out    "${root}/output"
global table  "${out}/tables"
global graph  "${out}/graphs"
global data_new  "${out}/Generated data"

cap mkdir "${out}"
cap mkdir "${table}"
cap mkdir "${graph}"
cap mkdir "${data_new}"


cap log close _all
log using "${out}/01_clean.log", text replace name(rep)


//global data   "data/"
//global graph  "graph/"
//global table  "tables/"


***************************************************************
* 1. AFROBAROMETER R9 – DATES, TIME VAR, DEMOGRAPHICS, OUTCOMES
***************************************************************

use "${data}/Original_data_R9.dta", clear

*--------------------------------------------------------------
* 1.1 Interview date and time within country
*--------------------------------------------------------------

* Numeric interview date
gen intervdate = DATEINTR
format intervdate %td
label var intervdate "Interview date (numeric Stata date)"

* Fieldwork window (global)
summ intervdate if !missing(intervdate)
scalar field_start = r(min)
scalar field_end   = r(max)

display "Fieldwork start: " %td field_start
display "Fieldwork end:   " %td field_end

* Time since first interview within country
egen min_intervdate = min(intervdate), by(COUNTRY)
gen time_interviewed = intervdate - min_intervdate + 1 if !missing(intervdate)
drop min_intervdate
label var time_interviewed "Day of fieldwork within country (1 = first day)"

*--------------------------------------------------------------
* 1.2 DEMOGRAPHIC VARIABLES
*--------------------------------------------------------------

* Recode and generate variables for age
tab Q1, nolabel  // Check numeric codes for "Refused" and "Don't know"
replace Q1 = . if Q1 == 998 | Q1 == 999  // Replace based on numeric codes
clonevar age = Q1

* Gender (0 = male, 1 = female)
recode Q100 (1 = 0) (2 = 1), gen(gender)
label define gender_lbl 0 "Male" 1 "Female"
label values gender gender_lbl
label var gender "Gender of respondent"

* Race (grouped) from Q101
recode Q101 ///
    (1 = 1) /// Black / African  
    (2 = 2) /// White / European  
    (3 = 3) /// Coloured / Mixed race  
    (4 = 4) /// Arab / Lebanese / North African  
    (5 = 5) /// South Asian  
    (6 = 5) /// East Asian -> grouped with South Asian  
    (9995 = 6) /// Other  
    (9999 = .), gen(race)

label define race_lbl ///
    1 "Black / African" ///
    2 "White / European" ///
    3 "Coloured / Mixed race" ///
    4 "Arab / Lebanese / North African" ///
    5 "Asian (South & East Asian)" ///
    6 "Other"

label values race race_lbl
label var race "Race of respondent (grouped)"

recode race (1 = 1) (2 = 2) (3/6 = 3), gen(race_group)
label define race_group_lbl ///
    1 "Black / African" ///
    2 "White / European" ///
    3 "Others", replace
label values race_group race_group_lbl
//tab race


* Religion (grouped) from Q95
recode Q95 ///
    (0 = 5) /// None
    (1 2 3 4 = 2) /// Christian (unspecified + Catholic/Orthodox/Coptic)
    (5 6 7 8 9 10 11 12 13 14 15 16 17 30 31 32 33 100 420 500 740 780 820 821 822 823 = 3) /// Protestant / other Christian
    (18 19 20 21 22 23 24 = 1) /// Muslims (all)
    (25 = 4) /// Traditional/Ethnic religion
    (26 27 28 29 34 9994 9995 = 5) /// Others, none/agnostic/atheist/not asked
    (9998 = .) ///
    (9999 = .) ///
    (-1 = .), gen(religion)

label define religion_lbl ///
    1 "Muslim (all groups)" ///
    2 "Christian (all groups)" ///
    3 "Protestant (all groups)" ///
    4 "Traditional/Ethnic religion" ///
    5 "Others"

label values religion religion_lbl
label var religion "Religious affiliation (grouped)"
tab religion

* Urban–rural (URBRUR)
recode URBRUR (1 = 1) (2 3 = 2), gen(urban_rural)
label define urban_rural_lbl 1 "Urban" 2 "Rural"
label values urban_rural urban_rural_lbl
label var urban_rural "Urban-Rural"

* Ethnic group – just clean missing codes -> .
recode Q84A (-1 9990 9994 9995 9998 9999 = .), gen(ethnic)
label var ethnic "Ethnic group"

* Education (recoded + broader groups) from Q94
recode Q94 ///
    (0 = 0) ///
    (1 = 1) ///
    (2 = 2) ///
    (3 = 3) ///
    (4 = 4) ///
    (5 = 5) ///
    (6 = 6) ///
    (7 = 7) ///
    (8 = 8) ///
    (9 = 9) ///
    (98 = .) ///
    (99 = .), gen(recoded_education)

label define education_lbl ///
    0 "No formal schooling" ///
    1 "Informal schooling only" ///
    2 "Some primary schooling" ///
    3 "Primary school completed" ///
    4 "Some secondary / high school" ///
    5 "Secondary / high school completed" ///
    6 "Post-secondary, non-university" ///
    7 "Some university" ///
    8 "University completed" ///
    9 "Post-graduate"

label values recoded_education education_lbl
label var recoded_education "Re-coded Education Levels"

* Broader education groups
recode recoded_education (0 1 2 = 1) (3 4 = 2) (5 6 7 = 3) (8 9 = 4), gen(educ_group)
label define educ_group_lbl ///
    1 "Below primary" ///
    2 "Primary" ///
    3 "Secondary" ///
    4 "University"
label values educ_group educ_group_lbl
label var educ_group "Education level"
tab educ_group

* Employment status from Q93A
recode Q93A ///
    (0 = 0) /// No (not looking)
    (1 = 1) /// No (looking)
    (2 = 2) /// Yes, part time
    (3 = 3) /// Yes, full time
    (8 = .) ///
    (9 = .) ///
    (9994 = .), gen(recoded_employment)

label define employment_status_lbl ///
    0 "No (not looking)" ///
    1 "No (looking)" ///
    2 "Yes, part time" ///
    3 "Yes, full time"

label values recoded_employment employment_status_lbl
label var recoded_employment "Re-coded Employment Status"

* Broader employment groups
recode recoded_employment (0 1 = 1) (2 = 2) (3 = 3), gen(emp_group)
label define emp_group_lbl ///
    1 "Not employed" ///
    2 "Part-time" ///
    3 "Full-time"
label values emp_group emp_group_lbl
label var emp_group "Employment status"
tab emp_group

* Safety walking in neighbourhood – Q7A
recode Q7A ///
    (0 = 0) ///
    (1 = 1) ///
    (2 = 2) ///
    (3 = 3) ///
    (4 = 4) ///
    (8 = .) ///
    (9 = .), gen(recoded_Q7A)

label define unsafe_walk_lbl ///
    0 "Never" ///
    1 "Just once or twice" ///
    2 "Several times" ///
    3 "Many times" ///
    4 "Always"
label values recoded_Q7A unsafe_walk_lbl
label var recoded_Q7A "Feeling unsafe walking in neighbourhood"

recode recoded_Q7A (0 1 = 1) (2 = 2) (3 4 = 3), gen(safety_group)
label define safety_group_lbl ///
    1 "Never or rarely" ///
    2 "Occasionally" ///
    3 "Frequently"
label values safety_group safety_group_lbl
label var safety_group "Safety Status"
gen safety = safety_group

* Fear of crime at home – Q7B
recode Q7B ///
    (0 = 0) ///
    (1 = 1) ///
    (2 = 2) ///
    (3 = 3) ///
    (4 = 4) ///
    (8 = .) ///
    (9 = .), gen(recoded_Q7B)

label define fear_crime_home_lbl ///
    0 "Never" ///
    1 "Just once or twice" ///
    2 "Several times" ///
    3 "Many times" ///
    4 "Always"
label values recoded_Q7B fear_crime_home_lbl
label var recoded_Q7B "Fearing crime at home (original categories)"

recode recoded_Q7B (0 1 = 1) (2 = 2) (3 4 = 3), gen(fearing_crime)
label define fearing_crime_lbl ///
    1 "Never or rarely" ///
    2 "Occasionally" ///
    3 "Frequently"
label values fearing_crime fearing_crime_lbl
label var fearing_crime "Fearing Crime at Home"
tab fearing_crime

* Discussing politics – Q8
recode Q8 ///
    (0 = 0) ///
    (1 = 1) ///
    (2 = 2) ///
    (8 = .) ///
    (9 = .), gen(recoded_Q8)

label define politics_lbl ///
    0 "Never" ///
    1 "Occasionally" ///
    2 "Frequently"
label values recoded_Q8 politics_lbl
label var recoded_Q8 "Frequency of discussing politics (original)"

recode recoded_Q8 (0 = 0) (1 = 1) (2 = 2), gen(discuss_politics)
label define discuss_politics_lbl ///
    0 "Never discuss politics" ///
    1 "Occasionally discuss politics" ///
    2 "Frequently discuss politics"
label values discuss_politics discuss_politics_lbl
label var discuss_politics "Frequency of discussing politics (grouped)"

* EA-level infrastructure (EA_FAC_C, EA_SEC_B, EA_SVC_B)
replace EA_FAC_C = . if EA_FAC_C == 9
replace EA_SEC_B = . if EA_SEC_B == 9
replace EA_SVC_B = . if EA_SVC_B == 9

gen police_station = (EA_FAC_C == 1)
gen soldiers_army  = (EA_SEC_B == 1)
gen piped_water    = (EA_SVC_B == 1)

label var police_station "Police station in town/village"
label var soldiers_army  "Soldiers/army in town/village"
label var piped_water    "Piped water system in town/village"

***************************************************************
* 1.3 OUTCOMES – DEMOCRACY, AUTHORITARIAN ALTERNATIVES, ETC.
***************************************************************

* Support for democracy – Q23
recode Q23 ///
    (1 = 1) ///
    (2 = 2) ///
    (3 = 3) ///
    (8 = .) ///
    (9 = .), gen(recoded_Q23)

label define democracy_support_lbl ///
    1 "It doesn't matter what kind of government" ///
    2 "Non-democratic government can be preferable" ///
    3 "Democracy is preferable"
label values recoded_Q23 democracy_support_lbl
label var recoded_Q23 "Support for democracy (original)"

recode recoded_Q23 (1 2 = 1) (3 = 2), gen(support_democracy_group)
label define support_democracy_group_lbl ///
    1 "Against democracy or indifferent" ///
    2 "Support democracy"
label values support_democracy_group support_democracy_group_lbl
label var support_democracy_group "Support for democracy (grouped)"

* Extent of democracy – Q30
recode Q30 ///
    (1 = 1) ///
    (2 = 2) ///
    (3 = 3) ///
    (4 = 4) ///
    (8 = .) ///
    (9 = .) ///
    (98 = .), gen(recoded_Q30)

label define democracy_extent_lbl ///
    1 "Not a democracy" ///
    2 "A democracy with major problems" ///
    3 "A democracy with minor problems" ///
    4 "A full democracy"
label values recoded_Q30 democracy_extent_lbl
label var recoded_Q30 "Extent of democracy (original)"

recode recoded_Q30 (1 = 1) (2 3 = 2) (4 = 3), gen(democracy_group)
label define democracy_group_lbl ///
    1 "Not a democracy" ///
    2 "Democracy with problems" ///
    3 "Full democracy"
label values democracy_group democracy_group_lbl
label var democracy_group "Extent of democracy (grouped)"


* Authoritarian alternatives – Q22A/B/C
recode Q22B ///
    (1 = 1) ///
    (2 = 2) ///
    (3 = 3) ///
    (4 = 4) ///
    (5 = 5) ///
    (8 = .) ///
    (9 = .), gen(recoded_Q22B)

label define military_rule_rejection_lbl ///
    1 "Strongly disapprove" ///
    2 "Disapprove" ///
    3 "Neither approve nor disapprove" ///
    4 "Approve" ///
    5 "Strongly approve"
label values recoded_Q22B military_rule_rejection_lbl
label var recoded_Q22B "Rejection of military rule (original)"

recode recoded_Q22B (1 2 = 3) (3 = 2) (4 5 = 1), gen(rejection_military_rule)
label define rejection_military_rule_lbl ///
    1 "Support military rule" ///
    2 "Neutral" ///
    3 "Reject military rule"
label values rejection_military_rule rejection_military_rule_lbl
label var rejection_military_rule "Rejection of military rule (grouped)"

recode Q22A ///
    (1 = 1) ///
    (2 = 2) ///
    (3 = 3) ///
    (4 = 4) ///
    (5 = 5) ///
    (8 = .) ///
    (9 = .), gen(recoded_Q22A)

label define one_party_rule_rejection_lbl ///
    1 "Strongly disapprove" ///
    2 "Disapprove" ///
    3 "Neither approve nor disapprove" ///
    4 "Approve" ///
    5 "Strongly approve"
label values recoded_Q22A one_party_rule_rejection_lbl
label var recoded_Q22A "Rejection of one-party rule (original)"

recode recoded_Q22A (1 2 = 3) (3 = 2) (4 5 = 1), gen(rejection_one_party_rule)
label define rejection_one_party_rule_lbl ///
    1 "Support one-party rule" ///
    2 "Neutral" ///
    3 "Reject one-party rule"
label values rejection_one_party_rule rejection_one_party_rule_lbl
label var rejection_one_party_rule "Rejection of one-party rule (grouped)"

recode Q22C ///
    (1 = 1) ///
    (2 = 2) ///
    (3 = 3) ///
    (4 = 4) ///
    (5 = 5) ///
    (8 = .) ///
    (9 = .), gen(recoded_Q22C)

label define one_man_rule_rejection_lbl ///
    1 "Strongly disapprove" ///
    2 "Disapprove" ///
    3 "Neither approve nor disapprove" ///
    4 "Approve" ///
    5 "Strongly approve"
label values recoded_Q22C one_man_rule_rejection_lbl
label var recoded_Q22C "Rejection of one-man rule (original)"

recode recoded_Q22C (1 2 = 3) (3 = 2) (4 5 = 1), gen(rejection_one_man_rule)
label define rejection_one_man_rule_lbl ///
    1 "Support one-man rule" ///
    2 "Neutral" ///
    3 "Reject one-man rule"
label values rejection_one_man_rule rejection_one_man_rule_lbl
label var rejection_one_man_rule "Rejection of one-man rule (grouped)"

* Voting – Q13
recode Q13 ///
    (0 = 0) ///
    (1 = .) ///
    (2 = .) ///
    (3 = 1) ///
    (-1 = .) ///
    (8 = .) ///
    (9 = .), gen(recoded_Q13)

label define voting_lbl 0 "Did not vote" 1 "Voted"
label values recoded_Q13 voting_lbl
label var recoded_Q13 "Voting in last national election"

recode recoded_Q13 (0 = 0) (1 = 1), gen(voted)
label values voted voting_lbl
label var voted "Voting status: voted (1) vs not (0)"


* Composite: rejection of authoritarian alternatives
egen auth_support = rowmean(rejection_military_rule rejection_one_party_rule rejection_one_man_rule)
label var auth_support "Rejection of authoritarian alternatives"

* Convenience copies for democracy outcomes
gen demo_support       = support_democracy_group
gen demo_rated         = democracy_group

***************************************************************
* 1.4 IDs AND STRINGS FOR MATCHING (R9)
***************************************************************

* Unique respondent ID
gen id1 = _n
* Locality and country for matching
replace locality1 = trim(lower(locality1))
decode COUNTRY, gen(country1)
replace country1 = trim(lower(country1))

* Combined locality + country
gen locality_country1 = locality1 + " " + country1

save "${data_new}/R9_c.dta", replace

***************************************************************
* 2. ACLED – DATES, TIME_EXPOSED, LOCALITY STRINGS
***************************************************************

use "${data}/Original_data_acled.dta", clear

* Event date as numeric
gen eventdate = date(event_date, "YMD")
format eventdate %td
label var eventdate "Event date (numeric Stata date)"

* Time since first event within country
* (assuming 'country' is string)
egen min_eventdate = min(eventdate), by(country)
gen time_exposed = eventdate - min_eventdate + 1 if !missing(eventdate)
label var time_exposed "Day of violence timeline within country (1 = first event)"

* IDs and cleaned locality/country
gen id2 = _n
gen country2 = trim(lower(country))

* Combined locality + country
gen locality_country2 = locality2 + " " + country2
save "${data_new}/acled_c.dta", replace

***************************************************************
* 3. FUZZY MATCHING – matchit ON locality+country
***************************************************************

*--------------------------------------------------------------
* 1. Get UNIQUE locality_country in R9
*--------------------------------------------------------------
use "${data_new}/R9_c.dta", clear

* Keep only the string we match on
keep locality_country1

* One row per locality_country1
duplicates drop locality_country1, force

* ID required by matchit
gen locid1 = _n

save "${data_new}/R9_loc_unique.dta", replace


*--------------------------------------------------------------
* 2. Get UNIQUE locality_country in ACLED
*--------------------------------------------------------------
use "${data_new}/acled_c.dta", clear

* Keep the string + event id
keep id2 locality_country2


* ID required by matchit (for the using side)
gen locid2 = _n

save "${data_new}/acled_loc_unique.dta", replace


*--------------------------------------------------------------
* 3. Fuzzy match on unique localities (MUCH smaller)
*--------------------------------------------------------------
capture which matchit
if _rc ssc install matchit

use "${data_new}/R9_loc_unique.dta", clear

matchit locid1 locality_country1 using "${data_new}/acled_loc_unique.dta", ///
    idu(locid2) txtu(locality_country2) threshold(0.90)

save "${data_new}/loc_match.dta", replace


*--------------------------------------------------------------
* 4. Bring id2 back to locality level, keep BEST match per locality
*--------------------------------------------------------------
use "${data_new}/loc_match.dta", clear

* Attach id2 from the ACLED locality file
merge m:1 locid2 using "${data_new}/acled_loc_unique.dta", keep(match) nogen

* If matchit returns multiple candidate matches per locid1,
* keep the one with the highest similarity score
sort locid1 similscore
by locid1: keep if _n == _N   // last = highest similscore

* Keep only what we need to merge back to individuals
keep locality_country1 id2 similscore

save "${data_new}/loc_to_id2.dta", replace


*--------------------------------------------------------------
* 5. Merge event ID back to full R9 data
*--------------------------------------------------------------
use "${data_new}/R9_c.dta", clear

* One id2 per locality assigned to ALL respondents in that locality
merge m:1 locality_country1 using "${data_new}/loc_to_id2.dta", ///
    keep(master match) nogen

* Now each respondent has an ACLED event ID (id2)
save "${data_new}/R9_with_id2.dta", replace


***************************************************************
* 6. FINAL MERGE: R9 + ACLED EVENTS
***************************************************************

* Start from respondent data with matched id2
use "${data_new}/R9_with_id2.dta", clear

* Merge ACLED information by id2
merge m:1 id2 using "${data_new}/acled_c.dta", keep(match) nogen

* Restrict to Afrobarometer fieldwork window
keep if inrange(eventdate, field_start, field_end)


* Time difference between interview and matched event (days)
gen tdiff = intervdate - eventdate if !missing(intervdate) & !missing(eventdate)
label var tdiff "Days between interview and matched event"
summ tdiff

* Post-event indicator (example)
gen time_zero = (tdiff > 0) if !missing(tdiff)
label var time_zero "Interview after matched event (1 = yes)"

* Survey year (for FE) – create only if missing
capture confirm variable surveyyear
if _rc {
    gen surveyyear = year(DATEINTR)
    label var surveyyear "Survey year (from DATEINTR)"
}

* 'year' variable for country-year merges – create only if missing
capture confirm variable year
if _rc {
    capture confirm variable surveyyear
    if !_rc {
        gen year = surveyyear
    }
    else {
        gen year = year(DATEINTR)
    }
}

* Numeric country ID (if not already there)
capture confirm variable cntrynum
if _rc {
    encode country, gen(cntrynum)
}

save "${data_new}/final_d.dta", replace


***************************************************************
* 5. MACRO-LEVEL MERGES (V-DEM + HDI)
***************************************************************

*--------------------------------------------------------------
* 5.1 Build libhdi.dta from vdem + macro
*--------------------------------------------------------------

use "${data}/vdem.dta", clear
merge m:1 cntrynum using "${data}/macro.dta"
keep if _merge == 3
drop _merge

rename HumanDevelopmentIndexHDI hdi
label var hdi "Human Development Index"
save "${data_new}/libhdi.dta", replace


*--------------------------------------------------------------
* 5.2 Merge macro data (HDI + V-Dem) into micro data
*--------------------------------------------------------------
use "${data_new}/final_d.dta", clear

* Drop any existing merge flag, regardless of origin
capture drop _merge

* Merge with unique macro file
merge m:1 cntrynum year using "${data_new}/libhdi.dta", gen(merge_hdi)

* Keep only matched country-years (optional but recommended)
keep if merge_hdi == 3
drop merge_hdi

save "${data}/R9_final.dta", replace
ta cntrynum
***************************************************************
* END OF DO-FILE
***************************************************************
